home *** CD-ROM | disk | FTP | other *** search
/ SGI Freeware 2002 November / SGI Freeware 2002 November - Disc 2.iso / dist / fw_glimpse.idb / usr / freeware / src / glimpse-3.0 / agrep / bitap.c.z / bitap.c
C/C++ Source or Header  |  1997-09-09  |  13KB  |  440 lines

  1. /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. */
  2. /* if the pattern is not simple fixed pattern, then after preprocessing */
  3. /* and generating the masks, the program goes here. four cases:  1.     */ 
  4. /* the pattern is simple regular expression and no error, then do the   */
  5. /* matching here.  2. the pattern is simple regular expression and      */
  6. /* unit cost errors are allowed: then go to asearch().                  */
  7. /* 3. the pattern is simple regular expression, and the edit cost is    */
  8. /* not uniform, then go to asearch1().                                  */
  9. /* if the pattern is regular expression then go to re() if M < 14,      */
  10. /* else go to re1()                                                     */
  11. /* input parameters: old_D_pat: delimiter pattern.                      */
  12. /* fd, input file descriptor, M: size of pattern, D: # of errors.       */
  13.  
  14. #include "agrep.h"
  15. #include "memory.h"
  16.  
  17. extern int CurrentByteOffset;
  18. extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[];
  19. extern int LIMITOUTPUT, LIMITPERFILE;
  20. extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE; 
  21. extern char D_pattern[];
  22. extern int TRUNCATE, DD, S;
  23. extern char Progname[], CurrentFileName[];
  24. extern int num_of_matched, prev_num_of_matched;
  25. extern int agrep_initialfd;
  26. extern int EXITONERROR;
  27. extern int agrep_inlen;
  28. extern CHAR *agrep_inbuffer;
  29. extern int agrep_inpointer;
  30. extern CHAR *agrep_outbuffer;
  31. extern int agrep_outlen;
  32. extern int agrep_outpointer;
  33. extern FILE *agrep_finalfp;
  34. extern int errno;
  35.  
  36. extern int NEW_FILE, POST_FILTER;
  37.  
  38. /* bitap dispatches job */
  39.  
  40. int
  41. bitap(old_D_pat, Pattern, fd, M, D)
  42. char old_D_pat[], *Pattern;  
  43. int fd, M, D;  
  44. {
  45.     char c;  
  46.     register unsigned r1, r2, r3, CMask, i;
  47.     register unsigned end, endpos, r_Init1;
  48.     register unsigned D_Mask;
  49.     int  ResidueSize , FIRSTROUND, lasti, print_end, j, num_read;
  50.     int  k;
  51.     CHAR *buffer;
  52.  
  53.     D_length = strlen(old_D_pat);
  54.     for(i=0; i<D_length; i++) if(old_D_pat[i] == '^' || old_D_pat[i] == '$')
  55.         old_D_pat[i] = '\n';
  56.     if (REGEX) { 
  57.         if (D > 4) {
  58.             fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname);
  59.             if (!EXITONERROR) {
  60.                 errno = AGREP_ERROR;
  61.                 return -1;
  62.             }
  63.             else exit(2);
  64.         }
  65.         if (M <= SHORTREG) { 
  66.             return re(fd, M, D);   /* SUN: need to find a even point */
  67.         }
  68.         else { 
  69.             return re1(fd, M, D);
  70.         }
  71.     }   
  72.     if (D > 0 && JUMP == ON) 
  73.     { 
  74.         return asearch1(old_D_pat, fd, D); 
  75.     }
  76.     if (D > 0) 
  77.     { 
  78.         return asearch(old_D_pat, fd, D); 
  79.     }
  80.     if(I == 0) Init1 = (unsigned)037777777777;
  81.  
  82.     j=0;
  83.  
  84.     r_Init1 = Init1;
  85.     r1 = r2 = r3 = Init[0];
  86.     endpos = D_endpos;
  87.  
  88.     D_Mask = D_endpos;
  89.     for(i=1 ; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;
  90.     D_Mask = ~D_Mask;
  91.     FIRSTROUND = ON;
  92.  
  93. #if    AGREP_POINTER
  94.     if (fd != -1) {
  95. #endif    /*AGREP_POINTER*/
  96.         alloc_buf(fd, &buffer, Max_record+BlockSize+1);
  97.         buffer[Max_record-1] = '\n';
  98.         lasti = Max_record;
  99.         while ((num_read = fill_buf(fd, buffer + Max_record, BlockSize)) > 0)
  100.         {
  101.             i=Max_record; 
  102.             end = Max_record + num_read; 
  103.             if(FIRSTROUND) {  
  104.                 i = Max_record - 1 ;
  105.  
  106.                 if(DELIMITER) {
  107.                     for(k=0; k<D_length; k++) {
  108.                         if(old_D_pat[k] != buffer[Max_record+k]) break;
  109.                     }
  110.                     if(k>=D_length) j--;
  111.                 }
  112.  
  113.                 FIRSTROUND = OFF;  
  114.             }
  115.             if(num_read < BlockSize) {
  116.                 strncpy(buffer+Max_record+num_read, old_D_pat, D_length);
  117.                 end = end + D_length;
  118.                 buffer[end] = '\0';
  119.             }
  120.  
  121.             /* BITAP_PROCESS: the while-loop below */
  122.             while (i < end)
  123.             {
  124.                 c = buffer[i++];
  125.                 CurrentByteOffset ++;
  126.                 CMask = Mask[c];
  127.                 r1 = r_Init1 & r3;
  128.                 r2 = (( r3 >> 1 ) & CMask) | r1;
  129.                 if ( r2 & endpos ) {
  130.                     j++;
  131.                     if (DELIMITER) CurrentByteOffset -= D_length;
  132.                     else CurrentByteOffset -= 1;
  133.                     if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
  134.                     { 
  135.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  136.                             num_of_matched++;
  137.  
  138.                             if (agrep_finalfp != NULL) 
  139.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  140.                             else {
  141.                                 int outindex;
  142.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  143.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  144.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  145.                                 }
  146.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  147.                                     OUTPUT_OVERFLOW;
  148.                                     free_buf(fd, buffer);
  149.                                     return -1;
  150.                                 }
  151.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  152.                                 agrep_outpointer += outindex;
  153.                             }
  154.  
  155.                             free_buf(fd, buffer);
  156.                             NEW_FILE = OFF;
  157.                             return 0; 
  158.                         }
  159.  
  160.                         print_end = i - D_length - 1;
  161.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  162.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} 
  163.                         if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
  164.                             ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
  165.                             free_buf(fd, buffer);
  166.                             return 0;    /* done */
  167.                         }
  168.                     }
  169.                     lasti = i - D_length; 
  170.                     TRUNCATE = OFF;
  171.                     r2 = r3 = r1 = Init[0];
  172.                     r1 = r_Init1 & r3;
  173.                     r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  174.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  175.                     else CurrentByteOffset += 1*1;
  176.                 }
  177.                 c = buffer[i++];
  178.                 CurrentByteOffset ++;
  179.                 CMask = Mask[c];
  180.                 r1 = r_Init1 & r2;
  181.                 r3 = (( r2 >> 1 ) & CMask) | r1; 
  182.                 if ( r3 & endpos ) {
  183.                     j++;
  184.                     if (DELIMITER) CurrentByteOffset -= D_length;
  185.                     else CurrentByteOffset -= 1;
  186.                     if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
  187.                     { 
  188.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  189.                             num_of_matched++;
  190.  
  191.                             if (agrep_finalfp != NULL) 
  192.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  193.                             else {
  194.                                 int outindex;
  195.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  196.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  197.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  198.                                 }
  199.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  200.                                     OUTPUT_OVERFLOW;
  201.                                     free_buf(fd, buffer);
  202.                                     return -1;
  203.                                 }
  204.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  205.                                 agrep_outpointer += outindex;
  206.                             }
  207.  
  208.                             free_buf(fd, buffer);
  209.                             NEW_FILE = OFF;
  210.                             return 0; 
  211.                         }
  212.  
  213.                         print_end = i - D_length - 1;
  214.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  215.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
  216.                         if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
  217.                             ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
  218.                             free_buf(fd, buffer);
  219.                             return 0;    /* done */
  220.                         }
  221.                     }
  222.                     lasti = i - D_length ;
  223.                     TRUNCATE = OFF;
  224.                     r2 = r3 = r1 = Init[0]; 
  225.                     r1 = r_Init1 & r2;
  226.                     r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  227.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  228.                     else CurrentByteOffset += 1*1;
  229.                 }   
  230.             }
  231.  
  232.             ResidueSize = num_read + Max_record - lasti;
  233.             if(ResidueSize > Max_record) {
  234.                 ResidueSize = Max_record;
  235.                 TRUNCATE = ON;   
  236.             }
  237.             strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize);
  238.             lasti = Max_record - ResidueSize;
  239.             if(lasti < 0) {
  240.                 lasti = 1;
  241.             } 
  242.             if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
  243.                 ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
  244.                 free_buf(fd, buffer);
  245.                 return 0;    /* done */
  246.             }
  247.         }
  248.         free_buf(fd, buffer);
  249.         return 0;
  250. #if    AGREP_POINTER
  251.     }
  252.     else {
  253.         buffer = agrep_inbuffer;
  254.         num_read = agrep_inlen;
  255.         end = num_read;
  256.         /* buffer[end-1] = '\n';*/ /* at end of the text. */
  257.         /* buffer[0] = '\n';*/  /* in front of the  text. */
  258.         i = 0;
  259.         lasti = 1;
  260.  
  261.         if(DELIMITER) {
  262.             for(k=0; k<D_length; k++) {
  263.                 if(old_D_pat[k] != buffer[k]) break;
  264.             }
  265.             if(k>=D_length) j--;
  266.         }
  267.  
  268.             /* An exact copy of the above: BITAP_PROCESS: the while-loop below */
  269.             while (i < end)
  270.             {
  271.                 c = buffer[i++];
  272.                 CurrentByteOffset ++;
  273.                 CMask = Mask[c];
  274.                 r1 = r_Init1 & r3;
  275.                 r2 = (( r3 >> 1 ) & CMask) | r1;
  276.                 if ( r2 & endpos ) {
  277.                     j++;
  278.                     if (DELIMITER) CurrentByteOffset -= D_length;
  279.                     else CurrentByteOffset -= 1;
  280.                     if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
  281.                     { 
  282.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  283.                             num_of_matched++;
  284.  
  285.                             if (agrep_finalfp != NULL) 
  286.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  287.                             else {
  288.                                 int outindex;
  289.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  290.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  291.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  292.                                 }
  293.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  294.                                     OUTPUT_OVERFLOW;
  295.                                     free_buf(fd, buffer);
  296.                                     return -1;
  297.                                 }
  298.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  299.                                 agrep_outpointer += outindex;
  300.                             }
  301.  
  302.                             free_buf(fd, buffer);
  303.                             NEW_FILE = OFF;
  304.                             return 0; 
  305.                         }
  306.  
  307.                         print_end = i - D_length - 1;
  308.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  309.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} 
  310.                         if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
  311.                             ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
  312.                             free_buf(fd, buffer);
  313.                             return 0;    /* done */
  314.                         }
  315.                     }
  316.                     lasti = i - D_length; 
  317.                     TRUNCATE = OFF;
  318.                     r2 = r3 = r1 = Init[0];
  319.                     r1 = r_Init1 & r3;
  320.                     r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  321.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  322.                     else CurrentByteOffset += 1*1;
  323.                 }
  324.                 c = buffer[i++];
  325.                 CurrentByteOffset ++;
  326.                 CMask = Mask[c];
  327.                 r1 = r_Init1 & r2;
  328.                 r3 = (( r2 >> 1 ) & CMask) | r1; 
  329.                 if ( r3 & endpos ) {
  330.                     j++;
  331.                     if (DELIMITER) CurrentByteOffset -= D_length;
  332.                     else CurrentByteOffset -= 1;
  333.                     if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
  334.                     { 
  335.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  336.                             num_of_matched++;
  337.  
  338.                             if (agrep_finalfp != NULL) 
  339.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  340.                             else {
  341.                                 int outindex;
  342.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  343.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  344.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  345.                                 }
  346.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  347.                                     OUTPUT_OVERFLOW;
  348.                                     free_buf(fd, buffer);
  349.                                     return -1;
  350.                                 }
  351.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  352.                                 agrep_outpointer += outindex;
  353.                             }
  354.  
  355.                             free_buf(fd, buffer);
  356.                             NEW_FILE = OFF;
  357.                             return 0; 
  358.                         }
  359.  
  360.                         print_end = i - D_length - 1;
  361.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  362.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
  363.                         if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
  364.                             ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
  365.                             free_buf(fd, buffer);
  366.                             return 0;    /* done */
  367.                         }
  368.                     }
  369.                     lasti = i - D_length ;
  370.                     TRUNCATE = OFF;
  371.                     r2 = r3 = r1 = Init[0]; 
  372.                     r1 = r_Init1 & r2;
  373.                     r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  374.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  375.                     else CurrentByteOffset += 1*1;
  376.                 }   
  377.             }
  378.  
  379.         return 0;
  380.     }
  381. #endif    /*AGREP_POINTER*/
  382. }
  383.  
  384. fill_buf(fd, buf, record_size)
  385. int fd, record_size; 
  386. unsigned char *buf;
  387. {
  388.     int num_read=1;
  389.     int total_read=0;
  390.     extern int glimpse_clientdied;
  391.  
  392.     if (fd >= 0) {
  393.         while(total_read < record_size && num_read > 0) {
  394.             if (glimpse_clientdied) return 0;
  395.             num_read = read(fd, buf+total_read, record_size - total_read);
  396.             total_read = total_read + num_read;
  397.         }
  398.     }
  399. #if    AGREP_POINTER
  400.     else return 0;    /* should not call this function if buffer is a pointer to a user-specified region! */
  401. #else    /*AGREP_POINTER*/
  402.     else {    /* simulate a file */
  403.         total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size;
  404.         memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read);
  405.         agrep_inpointer += total_read;
  406.         /* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/
  407.     }
  408. #endif    /*AGREP_POINTER*/
  409.     if (glimpse_clientdied) return 0;
  410.     return(total_read);
  411. }
  412.  
  413. /*
  414.  * In these functions no allocs/copying is done when
  415.  * fd == -1, i.e., agrep is called to search within memory.
  416.  */
  417.  
  418. void
  419. alloc_buf(fd, buf, size)
  420.     int fd;
  421.     char **buf;
  422.     int size;
  423. {
  424. #if    AGREP_POINTER
  425.     if (fd != -1)
  426. #endif    /*AGREP_POINTER*/
  427.         *buf = (char *)malloc(size);
  428. }
  429.  
  430. void
  431. free_buf(fd, buf)
  432.     int fd;
  433.     char *buf;
  434. {
  435. #if    AGREP_POINTER
  436.     if (fd != -1)
  437. #endif    /*AGREP_POINTER*/
  438.         free(buf);
  439. }
  440.